library(tidyverse)
library(GGally) # for ggpairs()
library(pdp) # for partial dependence plots
library(gridExtra)
library(knitr)
library(patchwork) # for figure insets etc.
library(ggtext)
library(betareg)
library(ggplot2)
library(terra)
library(sf)
theme_set(theme_classic())
Data compiled in the prepDataForModels.R script
modDat <- readRDS("../../../Data_processed/CoverData/DataForModels_spatiallyAveraged_withSoils_noSf.rds")
Here are the climate variables (30-year lag) we could potentially use in the models – however I think we should remove the 5th and 95th percentile variables here, since we’re interested in long-term averages rather than extremes
kable(modDat %>%
select(swe_meanAnnAvg_30yr:durationFrostFreeDays_meanAnnAvg_30yr) %>%
names()
)
| x |
|---|
| swe_meanAnnAvg_30yr |
| tmin_meanAnnAvg_30yr |
| tmax_meanAnnAvg_30yr |
| tmean_meanAnnAvg_30yr |
| vp_meanAnnAvg_30yr |
| prcp_meanAnnTotal_30yr |
| T_warmestMonth_meanAnnAvg_30yr |
| T_coldestMonth_meanAnnAvg_30yr |
| precip_wettestMonth_meanAnnAvg_30yr |
| precip_driestMonth_meanAnnAvg_30yr |
| precip_Seasonality_meanAnnAvg_30yr |
| PrecipTempCorr_meanAnnAvg_30yr |
| aboveFreezing_month_meanAnnAvg_30yr |
| isothermality_meanAnnAvg_30yr |
| annWaterDeficit_meanAnnAvg_30yr |
| annWetDegDays_meanAnnAvg_30yr |
| annVPD_mean_meanAnnAvg_30yr |
| annVPD_max_meanAnnAvg_30yr |
| annVPD_min_meanAnnAvg_30yr |
| annVPD_max_95percentile_30yr |
| annWaterDeficit_95percentile_30yr |
| annWetDegDays_5percentile_30yr |
| durationFrostFreeDays_5percentile_30yr |
| durationFrostFreeDays_meanAnnAvg_30yr |
Here are the weather anomaly variables (5-year lag) we could potentially use in the models
kable(modDat %>%
select(swe_meanAnn_5yrAnom:durationFrostFreeDays_meanAnnAvg_5yrAnom) %>%
names()
)
| x |
|---|
| swe_meanAnn_5yrAnom |
| tmean_meanAnnAvg_5yrAnom |
| tmin_meanAnnAvg_5yrAnom |
| tmax_meanAnnAvg_5yrAnom |
| vp_meanAnnAvg_5yrAnom |
| prcp_meanAnnTotal_5yrAnom |
| T_warmestMonth_meanAnnAvg_5yrAnom |
| T_coldestMonth_meanAnnAvg_5yrAnom |
| precip_wettestMonth_meanAnnAvg_5yrAnom |
| precip_driestMonth_meanAnnAvg_5yrAnom |
| precip_Seasonality_meanAnnAvg_5yrAnom |
| PrecipTempCorr_meanAnnAvg_5yrAnom |
| aboveFreezing_month_meanAnnAvg_5yrAnom |
| isothermality_meanAnnAvg_5yrAnom |
| annWaterDeficit_meanAnnAvg_5yrAnom |
| annWetDegDays_meanAnnAvg_5yrAnom |
| annVPD_mean_meanAnnAvg_5yrAnom |
| annVPD_min_meanAnnAvg_5yrAnom |
| annVPD_max_meanAnnAvg_5yrAnom |
| annVPD_max_95percentile_5yrAnom |
| annWaterDeficit_95percentile_5yrAnom |
| annWetDegDays_5percentile_5yrAnom |
| durationFrostFreeDays_5percentile_5yrAnom |
| durationFrostFreeDays_meanAnnAvg_5yrAnom |
And here are the soil variables we could potentially use in the models
kable(modDat %>%
select(soilDepth:totalAvailableWaterHoldingCapacity) %>%
names()
)
| x |
|---|
| soilDepth |
| surfaceClay_perc |
| avgSandPerc_acrossDepth |
| avgCoarsePerc_acrossDepth |
| avgOrganicCarbonPerc_0_3cm |
| totalAvailableWaterHoldingCapacity |
This is using a subset of the data (50,000 rows), just for runtime purposes
Below is the correlation between only climate predictors that are averaged across 30 years * excluded 95th and 5th percentile variables * Dropped tmin, tmax, t_warmest month, and t_coldest month and replaced w/ MAT * Dropped month above freezing (highly correlated with frost-free days in all ecoregions)
# function to add colors to correlation plot
my_fn <- function(data, mapping, method="p", use="pairwise", ...){
# grab data
x <- eval_data_col(data, mapping$x)
y <- eval_data_col(data, mapping$y)
# calculate correlation
corr <- cor(x, y, method=method, use=use)
# calculate colour based on correlation value
# Here I have set a correlation of minus one to blue,
# zero to white, and one to red
# Change this to suit: possibly extend to add as an argument of `my_fn`
colFn <- colorRampPalette(c("red", "white", "blue"), interpolate ='spline')
fill <- colFn(100)[findInterval(corr, seq(-1, 1, length=100))]
ggally_cor(data = data, mapping = mapping, stars = FALSE,
digits = 2, colour = I("black"),...) +
theme_void() +
theme(panel.background = element_rect(fill=fill))
}
(corrPlot_30yrMean <-
modDat %>%
select(c(swe_meanAnnAvg_30yr, tmean_meanAnnAvg_30yr:prcp_meanAnnTotal_30yr,
precip_wettestMonth_meanAnnAvg_30yr:PrecipTempCorr_meanAnnAvg_30yr,
isothermality_meanAnnAvg_30yr:annVPD_min_meanAnnAvg_30yr,
durationFrostFreeDays_meanAnnAvg_30yr)) %>%
rename("swe" = swe_meanAnnAvg_30yr, "tmean" = tmean_meanAnnAvg_30yr,
"vp" = vp_meanAnnAvg_30yr, "prcp" = prcp_meanAnnTotal_30yr,
"prcp \n Wettest" = precip_wettestMonth_meanAnnAvg_30yr, "prcpDriest" = precip_driestMonth_meanAnnAvg_30yr,
"prcp \n Seasonality" = precip_Seasonality_meanAnnAvg_30yr, "prcp \n TempCorr" = PrecipTempCorr_meanAnnAvg_30yr, "isothermality" = isothermality_meanAnnAvg_30yr,
"Wat \n Deficit" = annWaterDeficit_meanAnnAvg_30yr, "Wet \n DegDays" = annWetDegDays_meanAnnAvg_30yr,
"VPD \n mean" = annVPD_mean_meanAnnAvg_30yr, "VPD \n max" = annVPD_max_meanAnnAvg_30yr, "VPD \n min" = annVPD_min_meanAnnAvg_30yr,
"frost \n FreeDays" = durationFrostFreeDays_meanAnnAvg_30yr) %>%
slice_sample(n = 5e4) %>%
#select(-matches("_")) %>%
ggpairs( upper = list(continuous = my_fn), lower = list(continuous = GGally::wrap("points", alpha = 0.1, size=0.2)), progress = FALSE))
# get data for model fitting
modRegions <- readRDS("../../../Data_processed/CoverData/DataForModels_withSubEcoreg.rds")
# also get data for ecoregions
ecoRegions <- sf::st_read("../../../Data_raw/Level1Ecoregions/", layer = "NA_CEC_Eco_Level1")
## Reading layer `NA_CEC_Eco_Level1' from data source
## `/Users/astears/Documents/Dropbox_static/Work/NAU_USGS_postdoc/PED_vegClimModels/Data_raw/Level1Ecoregions'
## using driver `ESRI Shapefile'
## Simple feature collection with 2140 features and 5 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -4334052 ymin: -3313739 xmax: 3324076 ymax: 4267265
## Projected CRS: Sphere_ARC_INFO_Lambert_Azimuthal_Equal_Area
# filter the ecoregion data by CONUS boundary
# add the ecsoregions together as we've done in the model data
CONUS <- st_read("../../../Data_raw/CONUS_extent", "CONUS_boundary") %>%
st_transform(st_crs(ecoRegions)) %>%
st_make_valid()
## Reading layer `CONUS_boundary' from data source
## `/Users/astears/Documents/Dropbox_static/Work/NAU_USGS_postdoc/PED_vegClimModels/Data_raw/CONUS_extent'
## using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -124.6813 ymin: 25.12993 xmax: -67.00742 ymax: 49.38323
## Geodetic CRS: GCS_unknown
# trim the ecoregions by CONUS extent
ecoRegCrop <- ecoRegions %>%
st_make_valid() %>%
st_intersection( CONUS)
unique(st_drop_geometry(modRegions)[,c("NA_L1NAME", "newRegion")])
## NA_L1NAME newRegion
## 1 NORTHWESTERN FORESTED MOUNTAINS westForest
## 2 NORTH AMERICAN DESERTS dryShrubGrass
## 3 TEMPERATE SIERRAS westForest
## 56 SOUTHERN SEMIARID HIGHLANDS dryShrubGrass
## 1119 GREAT PLAINS dryShrubGrass
## 8227 MEDITERRANEAN CALIFORNIA dryShrubGrass
## 8248 MARINE WEST COAST FOREST westForest
## 8315 <NA> <NA>
## 9672 EASTERN TEMPERATE FORESTS eastForest
## 15184 NORTHERN FORESTS eastForest
## 77979 TROPICAL WET FORESTS eastForest
## 100882 WATER <NA>
ecoRegCrop$newRegion <- NA
ecoRegCrop[ecoRegCrop$NA_L1NAME %in% c("NORTHWESTERN FORESTED MOUNTAINS", "TEMPERATE SIERRAS", "MARINE WEST COAST FOREST"), "newRegion"] <- "Western Forests"
ecoRegCrop[ecoRegCrop$NA_L1NAME %in% c("EASTERN TEMPERATE FORESTS", "NORTHERN FORESTS", "TROPICAL WET FORESTS"), "newRegion"] <- "Eastern Forests"
ecoRegCrop[ecoRegCrop$NA_L1NAME %in% c("NORTH AMERICAN DESERTS", "SOUTHERN SEMIARID HIGHLANDS", "GREAT PLAINS", "MEDITERRANEAN CALIFORNIA"), "newRegion"] <- "Grass and Shrubland"
ecoReg <- ecoRegCrop %>%
aggregate(by = list(ecoRegCrop$newRegion),
FUN = unique,
do_union = TRUE)
saveRDS(ecoReg, "../../../Data_processed/CoverData/ecoRegionExtents.RDS")
ggplot(data = ecoReg) +
geom_sf(aes(fill = newRegion)) +
scale_fill_discrete(guide =
guide_legend(title = c("Ecoregion")))
# pdf(file = "../../../figures/EcoregionMap.pdf")
# ggplot(data = ecoReg) +
# geom_sf(aes(fill = newRegion)) +
# scale_fill_discrete(guide =
# guide_legend(title = c("Ecoregion")))
# dev.off()
#modDat_ecoregions <- readRDS("../../../data/DataForModels_withEcoregion.rds")
# just use modDat data
# get data
plotDat <-
modDat %>%
filter(newRegion == "dryShrubGrass") %>%
#st_drop_geometry() %>%
select(c(swe_meanAnnAvg_30yr, tmean_meanAnnAvg_30yr:prcp_meanAnnTotal_30yr,
precip_wettestMonth_meanAnnAvg_30yr:PrecipTempCorr_meanAnnAvg_30yr,
isothermality_meanAnnAvg_30yr:annVPD_min_meanAnnAvg_30yr,
durationFrostFreeDays_meanAnnAvg_30yr)) %>%
rename("swe" = swe_meanAnnAvg_30yr, "tmean" = tmean_meanAnnAvg_30yr,
"vp" = vp_meanAnnAvg_30yr, "prcp" = prcp_meanAnnTotal_30yr,
"prcp \n Wettest" = precip_wettestMonth_meanAnnAvg_30yr, "prcpDriest" = precip_driestMonth_meanAnnAvg_30yr,
"prcp \n Seasonality" = precip_Seasonality_meanAnnAvg_30yr, "prcp \n TempCorr" = PrecipTempCorr_meanAnnAvg_30yr, "isothermality" = isothermality_meanAnnAvg_30yr,
"Wat \n Deficit" = annWaterDeficit_meanAnnAvg_30yr, "Wet \n DegDays" = annWetDegDays_meanAnnAvg_30yr,
"VPD \n mean" = annVPD_mean_meanAnnAvg_30yr, "VPD \n max" = annVPD_max_meanAnnAvg_30yr, "VPD \n min" = annVPD_min_meanAnnAvg_30yr,
"frost \n FreeDays" = durationFrostFreeDays_meanAnnAvg_30yr) %>%
drop_na(swe, tmean, prcp, "prcp \n TempCorr", isothermality, "Wat \n Deficit",
"Wet \n DegDays", "VPD \n max" ) %>%
slice_sample(n = 5e5)
#select(-matches("_")) %>%
p1 <- ggpairs(plotDat, upper = list(continuous = my_fn),
lower = list(continuous = GGally::wrap("points", alpha = 0.1, size=0.2)), title = "Dry shrubland and grassland", progress = FALSE)
print(p1)
#modDat_ecoregions <- readRDS("../../../data/DataForModels_withEcoregion.rds")
# just use modDat data
# get data
plotDat <-
modDat %>%
filter(newRegion == "eastForest") %>%
#st_drop_geometry() %>%
select(c(swe_meanAnnAvg_30yr, tmean_meanAnnAvg_30yr:prcp_meanAnnTotal_30yr,
precip_wettestMonth_meanAnnAvg_30yr:PrecipTempCorr_meanAnnAvg_30yr,
isothermality_meanAnnAvg_30yr:annVPD_min_meanAnnAvg_30yr,
durationFrostFreeDays_meanAnnAvg_30yr)) %>%
rename("swe" = swe_meanAnnAvg_30yr, "tmean" = tmean_meanAnnAvg_30yr,
"vp" = vp_meanAnnAvg_30yr, "prcp" = prcp_meanAnnTotal_30yr,
"prcp \n Wettest" = precip_wettestMonth_meanAnnAvg_30yr, "prcpDriest" = precip_driestMonth_meanAnnAvg_30yr,
"prcp \n Seasonality" = precip_Seasonality_meanAnnAvg_30yr, "prcp \n TempCorr" = PrecipTempCorr_meanAnnAvg_30yr, "isothermality" = isothermality_meanAnnAvg_30yr,
"Wat \n Deficit" = annWaterDeficit_meanAnnAvg_30yr, "Wet \n DegDays" = annWetDegDays_meanAnnAvg_30yr,
"VPD \n mean" = annVPD_mean_meanAnnAvg_30yr, "VPD \n max" = annVPD_max_meanAnnAvg_30yr, "VPD \n min" = annVPD_min_meanAnnAvg_30yr,
"frost \n FreeDays" = durationFrostFreeDays_meanAnnAvg_30yr) %>%
drop_na(swe, tmean, prcp, "prcp \n TempCorr", isothermality, "Wat \n Deficit",
"Wet \n DegDays", "VPD \n max" ) %>%
slice_sample(n = 5e5)
#select(-matches("_")) %>%
p1 <- ggpairs(plotDat, upper = list(continuous = my_fn),
lower = list(continuous = GGally::wrap("points", alpha = 0.1, size=0.2)),
title = "Eastern Forests", progress = FALSE)
print(p1)
#modDat_ecoregions <- readRDS("../../../data/DataForModels_withEcoregion.rds")
# just use modDat data
# get data
plotDat <-
modDat %>%
filter(newRegion == "westForest") %>%
#st_drop_geometry() %>%
select(c(swe_meanAnnAvg_30yr, tmean_meanAnnAvg_30yr:prcp_meanAnnTotal_30yr,
precip_wettestMonth_meanAnnAvg_30yr:annVPD_min_meanAnnAvg_30yr, durationFrostFreeDays_meanAnnAvg_30yr)) %>%
rename("swe" = swe_meanAnnAvg_30yr, "tmean" = tmean_meanAnnAvg_30yr,
"vp" = vp_meanAnnAvg_30yr, "prcp" = prcp_meanAnnTotal_30yr,
"prcp \n Wettest" = precip_wettestMonth_meanAnnAvg_30yr, "prcpDriest" = precip_driestMonth_meanAnnAvg_30yr,
"prcp \n Seasonality" = precip_Seasonality_meanAnnAvg_30yr, "prcp \n TempCorr" = PrecipTempCorr_meanAnnAvg_30yr,
"abvFreeze \n Month" = aboveFreezing_month_meanAnnAvg_30yr, "isothermality" = isothermality_meanAnnAvg_30yr,
"Wat \n Deficit" = annWaterDeficit_meanAnnAvg_30yr, "Wet \n DegDays" = annWetDegDays_meanAnnAvg_30yr,
"VPD \n mean" = annVPD_mean_meanAnnAvg_30yr, "VPD \n max" = annVPD_max_meanAnnAvg_30yr, "VPD \n min" = annVPD_min_meanAnnAvg_30yr,
"frost \n FreeDays" = durationFrostFreeDays_meanAnnAvg_30yr) %>%
drop_na(swe, tmean, prcp, "prcp \n TempCorr", isothermality, "Wat \n Deficit",
"Wet \n DegDays", "VPD \n max" ) %>%
slice_sample(n = 5e5)
#select(-matches("_")) %>%
p1 <- ggpairs(plotDat, upper = list(continuous = my_fn),
lower = list(continuous = GGally::wrap("points", alpha = 0.1, size=0.2)),
title = "Western Forests", progress = FALSE)
print(p1)